import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import classification_report, confusion_matrix
train = pd.read_csv("linear-regression/train.csv")
test = pd.read_csv("linear-regression/test.csv")
train = train.dropna()
test = test.dropna()
train.head()
X_train = np.array(train.iloc[:, :-1].values)
y_train = np.array(train.iloc[:, 1].values)
X_test = np.array(test.iloc[:, :-1].values)
y_test = np.array(test.iloc[:, 1].values)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = model.score(X_test, y_test)
plt.plot(X_train, model.predict(X_train), color='green')
plt.show()
print(accuracy)
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score
from statistics import mode
train = pd.read_csv("titanic/train.csv")
test = pd.read_csv('titanic/test.csv')
train.head()
ports = pd.get_dummies(train.Embarked , prefix='Embarked')
train = train.join(ports)
train.drop(['Embarked'], axis=1, inplace=True)
train.Sex = train.Sex.map({'male':0, 'female':1})
y = train.Survived.copy()
X = train.drop(['Survived'], axis=1)
X.drop(['Cabin'], axis=1, inplace=True)
X.drop(['Ticket'], axis=1, inplace=True)
X.drop(['Name'], axis=1, inplace=True)
X.drop(['PassengerId'], axis=1, inplace=True)
X.Age.fillna(X.Age.median(), inplace=True)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter = 500000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = model.score(X_test, y_test)
print(accuracy)
confusion_matrix(y_test,y_pred)
classification_report(y_test,y_pred)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)
from sklearn.gaussian_process import GaussianProcessClassifier
model = GaussianProcessClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = model.score(X_test, y_test)
print(accuracy)
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
data_svm = pd.read_csv("svm-classification/UniversalBank.csv")
data_svm.head()
X = data_svm.iloc[:,1:13].values
y = data_svm.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
accuracies.mean()
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.svm import NuSVC
nu_svm = pd.read_csv("svm-classification/UniversalBank.csv")
nu_svm.head()
X = nu_svm.iloc[:,1:13].values
y = nu_svm.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
classifier = NuSVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
accuracies.mean()
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
data = pd.read_csv('classification-suv-dataset/Social_Network_Ads.csv')
data_nb = data
data_nb.head()
X = data_nb.iloc[:, [2,3]].values
y = data_nb.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
classifier=GaussianNB()
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
X = data_nb.iloc[:, [2,3]].values
y = data_nb.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
classifier=BernoulliNB()
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
from sklearn.neighbors import KNeighborsClassifier
knn = pd.read_csv("iris/Iris.csv")
knn.head()
X = knn.iloc[:, [1,2,3,4]].values
y = knn.iloc[:, 5].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
classifier=KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
from sklearn.linear_model import Perceptron
from sklearn.neighbors import KNeighborsClassifier
p = pd.read_csv("iris/Iris.csv")
p.head()
X = p.iloc[:, [1,2,3,4]].values
y = p.iloc[:, 5].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
classifier=Perceptron()
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
from sklearn.ensemble import RandomForestClassifier
rf = pd.read_csv("mushroom-classification/mushrooms.csv")
rf.head()
X = rf.drop('class', axis=1)
y = rf['class']
X = pd.get_dummies(X)
y = pd.get_dummies(y)
X_train, X_test, y_train, y_test = train_test_split(X, y)
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=1)
model.fit(X_train, y_train)
model.score(X_test, y_test)
from sklearn.tree import DecisionTreeClassifier
dt = data
dt.head()
X = dt.iloc[:, [2,3]].values
y = dt.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
classifier=DecisionTreeClassifier(criterion="entropy",random_state=0)
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
from sklearn.ensemble import ExtraTreesClassifier
et = data
et.head()
X = et.iloc[:, [2,3]].values
y = et.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
classifier=ExtraTreesClassifier(criterion="entropy",random_state=0)
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
from sklearn.ensemble import AdaBoostClassifier
ac = data
ac.head()
X = ac.iloc[:, [2,3]].values
y = ac.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
classifier=AdaBoostClassifier(random_state=0)
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
from sklearn.linear_model import PassiveAggressiveClassifier
pac = data
pac.head()
X = pac.iloc[:, [2,3]].values
y = pac.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
classifier=PassiveAggressiveClassifier(random_state=0)
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
from sklearn.ensemble import BaggingClassifier
bc = data
bc.head()
X = bc.iloc[:, [2,3]].values
y = bc.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
classifier=BaggingClassifier(random_state=0)
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
from sklearn.ensemble import GradientBoostingClassifier
gb = data
gb.head()
X = gb.iloc[:, [2,3]].values
y = gb.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
gbk = GradientBoostingClassifier()
gbk.fit(X_train, y_train)
pred = gbk.predict(X_test)
acc=accuracy_score(y_test, y_pred)
print(acc)
import lightgbm as lgbm
import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn import preprocessing
train = pd.read_csv("house-prices-advanced-regression-techniques/train.csv")
test = pd.read_csv("house-prices-advanced-regression-techniques/test.csv")
data = pd.concat([train, test], sort=False)
data = data.reset_index(drop=True)
data.head()
nans=pd.isnull(data).sum()
data['MSZoning'] = data['MSZoning'].fillna(data['MSZoning'].mode()[0])
data['Utilities'] = data['Utilities'].fillna(data['Utilities'].mode()[0])
data['Exterior1st'] = data['Exterior1st'].fillna(data['Exterior1st'].mode()[0])
data['Exterior2nd'] = data['Exterior2nd'].fillna(data['Exterior2nd'].mode()[0])
data["BsmtFinSF1"] = data["BsmtFinSF1"].fillna(0)
data["BsmtFinSF2"] = data["BsmtFinSF2"].fillna(0)
data["BsmtUnfSF"] = data["BsmtUnfSF"].fillna(0)
data["TotalBsmtSF"] = data["TotalBsmtSF"].fillna(0)
data["BsmtFullBath"] = data["BsmtFullBath"].fillna(0)
data["BsmtHalfBath"] = data["BsmtHalfBath"].fillna(0)
data["BsmtQual"] = data["BsmtQual"].fillna("None")
data["BsmtCond"] = data["BsmtCond"].fillna("None")
data["BsmtExposure"] = data["BsmtExposure"].fillna("None")
data["BsmtFinType1"] = data["BsmtFinType1"].fillna("None")
data["BsmtFinType2"] = data["BsmtFinType2"].fillna("None")
data['KitchenQual'] = data['KitchenQual'].fillna(data['KitchenQual'].mode()[0])
data["Functional"] = data["Functional"].fillna("Typ")
data["FireplaceQu"] = data["FireplaceQu"].fillna("None")
data["GarageType"] = data["GarageType"].fillna("None")
data["GarageYrBlt"] = data["GarageYrBlt"].fillna(0)
data["GarageFinish"] = data["GarageFinish"].fillna("None")
data["GarageCars"] = data["GarageCars"].fillna(0)
data["GarageArea"] = data["GarageArea"].fillna(0)
data["GarageQual"] = data["GarageQual"].fillna("None")
data["GarageCond"] = data["GarageCond"].fillna("None")
data["PoolQC"] = data["PoolQC"].fillna("None")
data["Fence"] = data["Fence"].fillna("None")
data["MiscFeature"] = data["MiscFeature"].fillna("None")
data['SaleType'] = data['SaleType'].fillna(data['SaleType'].mode()[0])
data['LotFrontage'].interpolate(method='linear',inplace=True)
data["Electrical"] = data.groupby("YearBuilt")['Electrical'].transform(lambda x: x.fillna(x.mode()[0]))
data["Alley"] = data["Alley"].fillna("None")
data["MasVnrType"] = data["MasVnrType"].fillna("None")
data["MasVnrArea"] = data["MasVnrArea"].fillna(0)
nans=pd.isnull(data).sum()
nans[nans>0]
_list = []
for col in data.columns:
if type(data[col][0]) == type('str'):
_list.append(col)
le = preprocessing.LabelEncoder()
for li in _list:
le.fit(list(set(data[li])))
data[li] = le.transform(data[li])
train, test = data[:len(train)], data[len(train):]
X = train.drop(columns=['SalePrice', 'Id'])
y = train['SalePrice']
test = test.drop(columns=['SalePrice', 'Id'])
kfold = KFold(n_splits=5, random_state = 2020, shuffle = True)
model_lgb = lgb.LGBMRegressor(objective='regression',num_leaves=5,
learning_rate=0.05, n_estimators=720,
max_bin = 55, bagging_fraction = 0.8,
bagging_freq = 5, feature_fraction = 0.2319,
feature_fraction_seed=9, bagging_seed=9,
min_data_in_leaf =6, min_sum_hessian_in_leaf = 11)
model_lgb.fit(X, y)
r2_score(model_lgb.predict(X), y)
import xgboost as xgb
#Data is used the same as LGB
X = train.drop(columns=['SalePrice', 'Id'])
y = train['SalePrice']
X.head()
model_xgb = xgb.XGBRegressor(colsample_bytree=0.4603, gamma=0.0468,
learning_rate=0.05, max_depth=3,
min_child_weight=1.7817, n_estimators=2200,
reg_alpha=0.4640, reg_lambda=0.8571,
subsample=0.5213, silent=1,
random_state =7, nthread = -1)
model_xgb.fit(X, y)
r2_score(model_xgb.predict(X), y)
from catboost import CatBoostRegressor
#Data is used the same as LGB
X = train.drop(columns=['SalePrice', 'Id'])
y = train['SalePrice']
X.head()
cb_model = CatBoostRegressor(iterations=500,
learning_rate=0.05,
depth=10,
random_seed = 42,
bagging_temperature = 0.2,
od_type='Iter',
metric_period = 50,
od_wait=20)
cb_model.fit(X, y)
r2_score(cb_model.predict(X), y)
from sklearn.linear_model import SGDRegressor
#Data is used the same as LGB
X = train.drop(columns=['SalePrice', 'Id'])
y = train['SalePrice']
X.head()
SGD = SGDRegressor(max_iter = 100)
SGD.fit(X, y)
r2_score(SGD.predict(X), y)
from sklearn.linear_model import ElasticNet, Lasso, BayesianRidge, LassoLarsIC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
#Data is used the same as LGB
X = train.drop(columns=['SalePrice', 'Id'])
y = train['SalePrice']
X.head()
lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.0005, random_state=1))
lasso.fit(X, y)
r2_score(lasso.predict(X), y)
from sklearn.linear_model import RidgeClassifierCV
#Data is used the same as LGB
X = train.drop(columns=['SalePrice', 'Id'])
y = train['SalePrice']
X.head()
rcc = RidgeClassifierCV()
rcc.fit(X, y)
r2_score(rcc.predict(X), y)
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
#Data is used the same as LGB
X = train.drop(columns=['SalePrice', 'Id'])
y = train['SalePrice']
X.head()
KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
KRR.fit(X, y)
r2_score(KRR.predict(X), y)
from sklearn.linear_model import BayesianRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
#Data is used the same as LGB
X = train.drop(columns=['SalePrice', 'Id'])
y = train['SalePrice']
X.head()
BR = BayesianRidge()
BR.fit(X, y)
r2_score(BR.predict(X), y)
from sklearn.linear_model import ElasticNet, Lasso, BayesianRidge, LassoLarsIC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
#Data is used the same as LGB
X = train.drop(columns=['SalePrice', 'Id'])
y = train['SalePrice']
X.head()
ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))
ENet.fit(X, y)
r2_score(ENet.predict(X), y)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = data
lda.head()
X = lda.iloc[:, [2,3]].values
y = lda.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
Model=LinearDiscriminantAnalysis()
Model.fit(X_train,y_train)
y_pred=Model.predict(X_test)
print('accuracy is ',accuracy_score(y_pred,y_test))
from sklearn.cluster import KMeans
km = pd.read_csv("k-mean/km.csv")
km.head()
K_clusters = range(1,8)
kmeans = [KMeans(n_clusters=i) for i in K_clusters]
Y_axis = km[['latitude']]
X_axis = km[['longitude']]
# score = [kmeans[i].fit(Y_axis).score(Y_axis) for i in range(len(kmeans))]
# plt.plot(K_clusters, score)
# plt.xlabel('Number of Clusters')
# plt.ylabel('Score')
# plt.show()
kmeans = KMeans(n_clusters = 3, init ='k-means++')
kmeans.fit(km[km.columns[1:3]])
km['cluster_label'] = kmeans.fit_predict(km[km.columns[1:3]])
centers = kmeans.cluster_centers_
labels = kmeans.predict(km[km.columns[1:3]])
km.cluster_label.unique()
km.plot.scatter(x = 'latitude', y = 'longitude', c=labels, s=50, cmap='viridis')
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=100, alpha=0.5)
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
import tensorflow as tf
train_data = pd.read_csv("digit-recognizer/train.csv")
test_data = pd.read_csv("digit-recognizer/test.csv")
train_data.head()
X = np.array(train_data.drop("label", axis=1)).astype('float32')
y = np.array(train_data['label']).astype('float32')
for i in range(9):
plt.subplot(3,3,i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(X[i].reshape(28, 28), cmap=plt.cm.binary)
plt.xlabel(y[i])
plt.show()
X = X / 255.0
X = X.reshape(-1, 28, 28, 1)
y = to_categorical(y)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
X_test = np.array(test_data).astype('float32')
X_test = X_test / 255.0
X_test = X_test.reshape(-1, 28, 28, 1)
plt.figure(figsize=(10,10))
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',
activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',
activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',
activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',
activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))
model.summary()
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='model1.png')
#increse to epochs to 30 for better accuracy
model.compile(optimizer='adam', loss="categorical_crossentropy", metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10, batch_size=85, validation_data=(X_val, y_val))
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.show()
print(model.evaluate(X_val, y_val))
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
lstm = pd.read_csv("nyse/prices.csv")
lstm = lstm[lstm['symbol']=="NFLX"]
lstm['date'] = pd.to_datetime(lstm['date'])
lstm.set_index('date',inplace=True)
lstm = lstm.reset_index()
lstm.head()
data = lstm.filter(['close'])
dataset = data.values
training_data_len = math.ceil(len(dataset)*.75)
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
train_data = scaled_data[0:training_data_len, :]
x_train = []
y_train = []
for i in range(60,len(train_data)):
x_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i,0])
x_train,y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))
model =Sequential()
model.add(LSTM(64,return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(64, return_sequences= False))
model.add(Dense(32))
model.add(Dense(1))
model.summary()
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='model1.png')
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train,y_train, batch_size=85, epochs=20)
test_data= scaled_data[training_data_len-60:, :]
x_test = []
y_test = dataset[training_data_len:,:]
for i in range(60,len(test_data)):
x_test.append(test_data[i-60:i,0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1],1))
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
rmse = np.sqrt(np.mean(predictions - y_test)**2)
rmse
from sklearn.datasets import make_blobs
from sklearn import datasets
class PCA:
def __init__(self, n_components):
self.n_components = n_components
self.components = None
self.mean = None
def fit(self, X):
self.mean = np.mean(X, axis=0)
X = X - self.mean
cov = np.cov(X.T)
evalue, evector = np.linalg.eig(cov)
eigenvectors = evector.T
idxs = np.argsort(evalue)[::-1]
evalue = evalue[idxs]
evector = evector[idxs]
self.components = evector[0:self.n_components]
def transform(self, X):
#project data
X = X - self.mean
return(np.dot(X, self.components.T))
data = datasets.load_iris()
X = data.data
y = data.target
pca = PCA(2)
pca.fit(X)
X_projected = pca.transform(X)
x1 = X_projected[:,0]
x2 = X_projected[:,1]
plt.scatter(x1,x2,c=y,edgecolor='none',alpha=0.8,cmap=plt.cm.get_cmap('viridis',3))
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar()
plt.show()
df = pd.read_csv('supermarket/GroceryStoreDataSet.csv',names=['products'],header=None)
data = list(df["products"].apply(lambda x:x.split(',')))
data
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_data = te.fit(data).transform(data)
df = pd.DataFrame(te_data,columns=te.columns_)
df1 = apriori(df,min_support=0.01,use_colnames=True)
df1.head()
import plotly.offline as py
import plotly.express as px
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot
pred = pd.read_csv("coronavirus-2019ncov/covid-19-all.csv")
pred = pred.fillna(0)
predgrp = pred.groupby("Date")[["Confirmed","Recovered","Deaths"]].sum().reset_index()
pred_cnfrm = predgrp.loc[:,["Date","Confirmed"]]
pr_data = pred_cnfrm
pr_data.columns = ['ds','y']
pr_data.head()
m=Prophet()
m.fit(pr_data)
future=m.make_future_dataframe(periods=15)
forecast=m.predict(future)
forecast
fig = plot_plotly(m, forecast)
py.iplot(fig)
fig = m.plot(forecast,xlabel='Date',ylabel='Confirmed Count')
import datetime
from statsmodels.tsa.arima_model import ARIMA
ar = pd.read_csv("competitive-data-science-predict-future-sales/sales_train.csv")
ar.date=ar.date.apply(lambda x:datetime.datetime.strptime(x, '%d.%m.%Y'))
ar=ar.groupby(["date_block_num"])["item_cnt_day"].sum()
ar.index=pd.date_range(start = '2013-01-01',end='2015-10-01', freq = 'MS')
ar=ar.reset_index()
ar=ar.loc[:,["index","item_cnt_day"]]
ar.columns = ['confirmed_date','count']
ar.head()
model = ARIMA(ar['count'].values, order=(1, 2, 1))
fit_model = model.fit(trend='c', full_output=True, disp=True)
fit_model.summary()
fit_model.plot_predict()
plt.title('Forecast vs Actual')
pd.DataFrame(fit_model.resid).plot()
forcast = fit_model.forecast(steps=6)
pred_y = forcast[0].tolist()
pred = pd.DataFrame(pred_y)